/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.matching;

import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;

public class AbelianPattern {
	/** The pattern itself in terms of frequencies of occurence. */
	private int[] frequencies;
	/** Number of different characters in the pattern. */
	private int differentChars;
	/** Total length of the pattern. */
	private int length;
	
	/** Create empty pattern. */
	public AbelianPattern() {
		frequencies = new int[256];
		differentChars = 0;
		length=0;
	}
	
	/** Create pattern with initial letter frequencies. */
	public AbelianPattern(int[] frequencies) {
		differentChars=0;
		length=0;
		for (int i=0; i<frequencies.length; ++i) {
			if (frequencies[i]>0) {
				++differentChars;
				length+=frequencies[i];
			}
		}
		this.frequencies=frequencies;
	}
	
	/** Expects pattern in format like "a10;b3;c7". */
	public AbelianPattern(String s) {
		frequencies = new int[256];
		differentChars = 0;
		length = 0;
		StringTokenizer st = new StringTokenizer(s,";",false);
		while (st.hasMoreTokens()) {
			String token = st.nextToken();
			char c = token.charAt(0);
			int count = Integer.parseInt(token.substring(1));
			if (count==0) continue;
			if (frequencies[(int)c]==0) ++differentChars;
			frequencies[(int)c]+=count;
			length+=count;
		}
	}
	
	/** Modify pattern by specifying the number of occurences of character c. */
	public void set(char c, int count) {
		if (frequencies[(int)c]!=0) {
			--differentChars;
			length-=frequencies[(int)c];
			frequencies[(int)c]=0;
		}
		if (count>0) {
			++differentChars;
			length+=count;
			frequencies[(int)c]=count;
		}
	}
	
	/** Matches abelian pattern against text s und returns the number of matches.
	 *  @param matches If not null, the match position are stored into this list. 
	 */
	public int match(String s, List<Integer> matches) {
		if (matches!=null) matches.clear();
		// matches so far
		int matchCount = 0;
		// chars we still need to see
		int[] diff = new int[256];
		System.arraycopy(frequencies, 0, diff, 0, frequencies.length>256?256:frequencies.length);
		// how many entries in diff are not zero?
		int notZero = differentChars;
		for (int i=0; i<s.length(); ++i) {
			// handle character sliding into window
			int c = (int)(s.charAt(i));
			if (diff[c]==0) ++notZero;
			diff[c]-=1;
			if (diff[c]==0) --notZero;
			// handle character sliding out of window
			if (i>=length) {
				c = (int)(s.charAt(i-length));
				if (diff[c]==0) ++notZero;
				diff[c]+=1;
				if (diff[c]==0) --notZero;
			}
			// check if we found a match
			if (notZero==0) {
				matchCount+=1;
				if (matches!=null) matches.add(i-length+1);
			}
		}
		return matchCount;
	}
	
	/** Helper function for generateAllPatterns which spreads a character over a string. */
	private void fillPositions(char[] s, int[] positions, char c, int count, int startIdx, List<char[]> resultList) {
		if (count==0) {
			resultList.add(s);
			return;
		}
		for (int i=startIdx; i<positions.length-count+1; ++i) {
			char[] s_new = new char[s.length];
			System.arraycopy(s, 0, s_new, 0, s.length);
			s_new[positions[i]]=c;
			fillPositions(s_new, positions, c, count-1, i+1, resultList);
		}
	}
	
	/** Generates a list of all patterns which are matched by the abelian pattern. */
	public List<String> generateAllPatterns() {
		List<char[]> l = new ArrayList<char[]>();
		char[] seed = new char[length];
		for (int i=0; i<length; ++i) seed[i]='.';
		l.add(seed);
		for (int i=0; i<frequencies.length; ++i) {
			if (frequencies[i]==0) continue;
			List<char[]> new_l = new ArrayList<char[]>();
			for (char[] s : l) {
				// positions to be filled
				int[] positions = new int[length];
				int k = 0;
				for (int j=0; j<length; ++j){
					if (s[j]=='.') positions[k++]=j;
				}
				int[] p_new = new int[k];
				System.arraycopy(positions, 0, p_new, 0, k);
				positions=p_new;
				fillPositions(s, positions, (char)i, frequencies[i], 0, new_l);
			}
			l=new_l;
		}
		ArrayList<String> result = new ArrayList<String>(l.size());
		for (char[] s : l) result.add(new String(s));
		return result;
	}
	
	/** Returns the number of strings that match this abelian pattern. This
	 *  is the length of the list that would be returned by generateAllPatterns(). */
	public long countAllStrings() {
		// this method uses the formula:
		// result = this.length! / (frequencies[0]!*...*frequencies[255]!)
		// where ! denotes the factorial
		long result = 1;
		// 1) determine largest frequency
		int largestFrequency = 0;
		int largestFrequencyIndex = -1;
		for (int i=0; i<frequencies.length; ++i) {
			if (frequencies[i]>largestFrequency) {
				largestFrequency=frequencies[i];
				largestFrequencyIndex=i;
			}
		}
		// 2) determine multiplicity of factors in the denominator
		int largestDenFactor = 1;
		int[] denFactors = new int[length+1];
		for (int i=0; i<frequencies.length; ++i) {
			if (i==largestFrequencyIndex) continue;
			if (frequencies[i]>largestDenFactor) largestDenFactor=frequencies[i];
			for (int factor=2; factor<=frequencies[i]; ++factor) {
				denFactors[factor]+=1;
			}
		}
		// 3) main loop
		for (int factor=largestFrequency+1; factor<=length; ++factor) {
			result*=factor;
			// try to reduce the fraction
			for (int i=largestDenFactor; i>=2; --i) {
				if (denFactors[i]==0) continue;
				if (result%i==0) {
					result/=i;
					denFactors[i]-=1;
				}
			}
		}
		return result;
	}
	
	public int length() { return length; }
	
	public int[] getFrequencies() { return frequencies; }
	
	public String toString() {
		boolean first = true;
		StringBuffer sb = new StringBuffer();
		for (int i=0; i<frequencies.length; ++i) {
			if (frequencies[i]>0) {
				if (!first) {
					sb.append(';');
				} else {
					first=false;
				}
				sb.append(String.format("%s%d", (char)i, frequencies[i]));
			}
		}
		return sb.toString();
	}
}
